{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "import json\n",
    "import pickle"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "data_file = '/Users/sarabeery/Documents/CameraTrapClass/DataWrangling/Databases/combined_project_database_locs.json'\n",
    "with open(data_file, 'r') as f:\n",
    "     data = json.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "tax_file = '/Users/sarabeery/Documents/CameraTrapClass/DataWrangling/CreatingDatabases/category_data_with_taxonomy.json'\n",
    "with open(tax_file, 'r') as f:\n",
    "     taxonomy = json.load(f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "db_file = '/Users/sarabeery/Documents/CameraTrapClass/DataWrangling/CreatingDatabases/inat_category_lookup.p'\n",
    "inat_cat_lookup = pickle.load(open(db_file,'rb'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[u'car', 33]\n",
      "[u'empty', 30]\n"
     ]
    }
   ],
   "source": [
    "images = data['images']\n",
    "categories = data['categories']\n",
    "annotations = data['annotations']\n",
    "# locations = data['locations']\n",
    "cat_names = [[cat['name'],cat['id']] for cat in categories]\n",
    "for cat in cat_names:\n",
    "    if cat[0] not in inat_cat_lookup:\n",
    "        print(cat)\n",
    "cat_to_id = {}\n",
    "for cat in categories:\n",
    "    cat_to_id[cat['name']] = cat['id']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "5089\n",
      "{u'supercategory': u'Insecta', u'ancestors': [{u'name': u'Animalia', u'rank': u'kingdom'}, {u'name': u'Arthropoda', u'rank': u'phylum'}, {u'name': u'Hexapoda', u'rank': u'subphylum'}, {u'name': u'Insecta', u'rank': u'class'}, {u'name': u'Pterygota', u'rank': u'subclass'}, {u'name': u'Coleoptera', u'rank': u'order'}, {u'name': u'Polyphaga', u'rank': u'suborder'}, {u'name': u'Staphyliniformia', u'rank': u'suborder'}, {u'name': u'Staphylinoidea', u'rank': u'superfamily'}, {u'name': u'Silphidae', u'rank': u'family'}, {u'name': u'Nicrophorinae', u'rank': u'subfamily'}, {u'name': u'Nicrophorus', u'rank': u'genus'}], u'id': 1916, u'name': u'Nicrophorus tomentosus'}\n"
     ]
    }
   ],
   "source": [
    "print(len(taxonomy))\n",
    "print(taxonomy[0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 115,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "spec_to_id = {}\n",
    "id_to_taxa = {}\n",
    "for spec in taxonomy:\n",
    "    spec_to_id[spec['name']] = spec['id']\n",
    "    id_to_taxa[spec['id']] = spec"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 116,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "camtrap_spec = set(inat_cat_lookup.values())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 117,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "5089\n"
     ]
    }
   ],
   "source": [
    "inat_spec = set(spec_to_id.keys())\n",
    "print(len(inat_spec))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 121,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "not in inat: Zonotrichia, sparrow\n",
      "not in inat: Troglodytidae, wren\n",
      "not in inat: Colaptes, flicker\n",
      "not in inat: Homo erectus, human_bike\n",
      "not in inat: Muridae, rodent\n",
      "not in inat: Spilogale gracilis, spotted_skunk\n",
      "not in inat: Mammalia, small mammal\n",
      "not in inat: Homo erectus, human\n",
      "not in inat: Mustelidae, badger\n",
      "not in inat: Cervus canadensis, elk\n",
      "not in inat: Corvidae, jay\n",
      "not in inat: Aspidoscelis, whiptail\n",
      "not in inat: Polioptilidae, gnatcatcher\n",
      "not in inat: Chiroptera, bat\n",
      "not in inat: Bubo, horned_owl\n",
      "not in inat: Dipodomys merriami, krat\n",
      "not in inat: Vulpes, fox\n",
      "not in inat: Aves, bird\n",
      "not in inat: Vulpes macrotis, kit_fox\n",
      "not in inat: car, human_ohv\n",
      "not in inat: Turdus, thrush\n",
      "not in inat: Lacertilia, herp\n",
      "not in inat: Bos taurus, cow\n",
      "not in inat: Columbina, ground_dove\n",
      "not in inat: Strigiformes, owl\n",
      "not in inat: Gymnorhinus cyanocephalus, pinyon_jay\n",
      "not in inat: Didelphis, opossum\n",
      "not in inat: Sciuridae, squirrel\n",
      "not in inat: Leporidae, rabbit\n",
      "not in inat: Mimidae, thrasher\n",
      "not in inat: Molothrus, cowbird\n",
      "not in inat: Lacertilia, lizard\n",
      "not in inat: Lepidoptera, moth\n",
      "not in inat: Petronia petronia, rock_sparrow\n",
      "not in inat: Cervidae, deer\n",
      "not in inat: Lepus, jackrabbit\n",
      "not in inat: Coturnix coturnix, quail\n",
      "not in inat: Picidae, woodpecker\n",
      "not in inat: Mus, mouse\n",
      "not in inat: Peromyscus californicus, california_mouse\n",
      "not in inat: Dipodomys merriami, kangaroo_rat\n",
      "not in inat: Spinus, finch\n",
      "not in inat: Lepidoptera, butterfly\n",
      "not in inat: Accipitridae, hawk\n",
      "not in inat: Calypte, hummingbird\n",
      "not in inat: Canis familiaris, dog\n",
      "not in inat: Mephitidae, skunk\n"
     ]
    }
   ],
   "source": [
    "old_id_to_new_id = {}\n",
    "for cat in inat_cat_lookup:\n",
    "    if cat in cat_to_id:\n",
    "        if inat_cat_lookup[cat] in spec_to_id:\n",
    "            old_id = cat_to_id[cat]\n",
    "            new_id = spec_to_id[inat_cat_lookup[cat]]\n",
    "            old_id_to_new_id[old_id] = new_id\n",
    "        else:\n",
    "            print('not in inat: ' +inat_cat_lookup[cat] +', '+ cat)\n",
    "#     else:\n",
    "#         print cat\n",
    "pickle.dump(old_id_to_new_id,open(\"old_id_to_new_id.p\", \"wb\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 93,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['Sciuridae', 'Zonotrichia', 'Mus', 'Canis familiaris', 'Lacertilia', 'Turdus', 'Picidae', 'Coturnix coturnix', 'Lepidoptera', 'Didelphis', 'Aves', 'Bubo', 'Strigiformes', 'Vulpes macrotis', 'Molothrus', 'Homo erectus', 'Corvidae', 'Leporidae', 'Peromyscus californicus', 'Lepus', 'Aspidoscelis', 'Petronia petronia', 'Spilogale gracilis', 'Calypte', 'Colaptes', 'Cervidae', 'Gymnorhinus cyanocephalus', 'Columbina', 'Cervus canadensis', 'Mustelidae', 'Dipodomys merriami', 'Troglodytidae', 'Spinus', 'Accipitridae', 'Polioptilidae', 'Vulpes', 'Muridae', 'Ammospermophilus', 'Mimidae', 'car', 'Bos taurus', 'Chiroptera', 'Mephitidae', 'Mammalia', 'Sus']\n"
     ]
    }
   ],
   "source": [
    "diff = camtrap_spec - inat_spec\n",
    "print(list(diff))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 94,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['Canis familiaris', 'Coturnix coturnix', 'Vulpes macrotis', 'Homo erectus', 'Peromyscus californicus', 'Petronia petronia', 'Spilogale gracilis', 'Gymnorhinus cyanocephalus', 'Cervus canadensis', 'Dipodomys merriami', 'Bos taurus']\n"
     ]
    }
   ],
   "source": [
    "species_not_in_inat = [spec for spec in diff if ' ' in spec]\n",
    "macro_cats = [spec for spec in diff if ' ' not in spec]\n",
    "print(species_not_in_inat)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 86,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['Sciuridae', 'Zonotrichia', 'Mus', 'Lacertilia', 'Turdus', 'Picidae', 'Lepidoptera', 'Didelphis', 'Aves', 'Bubo', 'Strigiformes', 'Molothrus', 'Corvidae', 'Leporidae', 'Lepus', 'Aspidoscelis', 'Calypte', 'Colaptes', 'Cervidae', 'Columbina', 'Mustelidae', 'Troglodytidae', 'Spinus', 'Accipitridae', 'Polioptilidae', 'Vulpes', 'Muridae', 'Ammospermophilus', 'Mimidae', 'car', 'Chiroptera', 'Mephitidae', 'Mammalia', 'Sus']\n"
     ]
    }
   ],
   "source": [
    "print(macro_cats)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 96,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "newCats = [cat for cat in categories]\n",
    "numSpec = len(inat_spec)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "new_id_to_old_id = []\n",
    "for cat in newCats:\n",
    "    cat['oldName'] = cat['name']\n",
    "    cat['name'] = inat_cat_lookup[cat['name']]\n",
    "    if cat['name'] in spec_to_id:\n",
    "        cat['old_id'] = cat['id']\n",
    "        cat['id'] = spec_to_id[cat['name']]"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
